library(tidyverse) |> suppressPackageStartupMessages()
library(sf) |> suppressPackageStartupMessages()
library(mapview) |> suppressPackageStartupMessages()
library(units) |> suppressPackageStartupMessages()
library(concaveman) |> suppressPackageStartupMessages()
library(dplyr) |> suppressPackageStartupMessages()
library(spatstat) |> suppressPackageStartupMessages()
cb_palette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#CC79A7")Introduction
The US Civil War was one of the first wars that attempted to leverage the use of railroads. Controlling railroad locations and having easy access contributed to the Union’s ability to win the war, especially because most battles took place in the US South. Railroads are able to be utilized to transport troops and supplies, and by doing so provide crucial support for an army that is not on its home soil.
The question I would like to explore: Is the Union more likely to win battles that are located in close proximity to railroad junctions?
Null hypothesis: The likelihood that an Union victory occurs at the given point si is directly proportional to the density of railroad junctions at that point.
Alternative hypothesis: There exists a population density threshold _p^*_ such that Union victories occur at locations _si^*_ with railroad junction density greater than _p^*_ more often than what would be expected solely on the basis of this railroad junction density.
Broad Roadmap
- Determine where the railroad junctions are located (specifically in the US South).
- Determine where Civil War battles took place.
- Analyze distances from battle sites to railroad junctions.
- Compare intensity functions of railroad junctions and battle sites.
- Run an experiment that randomizes some element of battle sites location or railroad junction locations.
- Examine possible spatial effect of railroad junctions on the results of battles, specifically battles that resulted in an Union victory.
- Is there anything we can test regarding the significance of the battle?
Exploratory Data Analysis (EDA)
junction_df <- read_csv("Junction.csv", show_col_types = FALSE)New names:
• `` -> `...4`
Warning: One or more parsing issues, call `problems()` on your data frame for details,
e.g.:
dat <- vroom(...)
problems(dat)
junction_df |> head()| X | Y | Name | …4 |
|---|---|---|---|
| -85.38300 | 32.64540 | Junction | NA |
| -81.96342 | 33.47022 | Junction | NA |
| -77.05674 | 38.81042 | Junction | NA |
| -90.04864 | 29.94394 | Junction | NA |
| -81.31636 | 34.24344 | Junction | NA |
| -76.79234 | 39.12509 | Junction | NA |
junction_sf <- junction_df |> sf::st_as_sf(
coords=c("X","Y"),
crs=4326
) |> sf::st_transform(3857)
mapview(junction_sf)battle_df <- read_csv("nps_battles.csv", show_col_types = FALSE)
battle_df <- battle_df %>%
mutate(
significance_numeric = case_when(
significance == 'A' ~ 4,
significance == 'B' ~ 3,
significance == 'C' ~ 2,
significance == 'D' ~ 1
)
)
battle_df |> head()| cwsac_id | battle_name | start_date | end_date | theater_code | campaign_code | result | cwss_url | partof_cwss | operation | forces_text | casualties_text | results_text | preservation | significance | cwsac_url | other_names | partof_cwsac | cws2_url | study_area | core_area | potnr_boundary | partof_cws2 | interpretive_political | interpretive_commander_loss | interpretive_casualties | interpretive_tactics_strategy | interpretive_public_mind | interpretive_combat_arm | interpretive_military_firsts | interpretive_minority_troops | interpretive_economic | interpretive_archaelolgical | interpretive_logistics | interpretive_individual_bravery | interpretive_group_behavior | interpretive_joint_ops | interpretive_coop_armies | interpretive_naval | significance_jim | significance_ed | significance_bill | aad_url | battle_type | partof_aad | lat | long | state | strength_mean | strength_var | casualties_kwm_mean | casualties_kwm_var | comment | significance_numeric |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| AL001 | Day’s Gap | 1863-04-30 | 1863-04-30 | MW | MW63-03 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL001 | TRUE | 0 | Men from 51st Indiana Infantry, 73rd Indiana Infantry, 3rd Ohio Infantry, 80th Illinois Infantry, and 1st Middle Tennessee Cavalry [US]; three regiments [CS] | 88 total (US 23; CS 65) | Union victory, although the raid ultimately failed. | III.3 | C | http://www.nps.gov/abpp/battles/al001.htm | Sand Mountain | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 5410.35 | 1084.76 | 2933.02 | TRUE | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | D | C | http://aad.archives.gov/aad/record-detail.jsp?rid=320&dt=295 | Action | TRUE | 34.3096 | -87.0110 | AL | NA | NA | 88 | 0.1666667 | NA | 2 |
| AL002 | Athens | 1864-01-26 | 1864-01-26 | MW | MW64-02 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL002 | TRUE | 0 | 9th Illinois Mounted Infantry [US]; 1st Alabama Cavalry [CS] | 50 total (US 20; CS 30) | Union victory (The Confederate force failed in its attempt to take Athens.) | IV.1 | D | http://www.nps.gov/abpp/battles/al002.htm | NA | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 21919.82 | 2274.85 | 2998.41 | TRUE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | D | NA | http://aad.archives.gov/aad/record-detail.jsp?rid=314&dt=295 | Attack | TRUE | 34.8033 | -86.9722 | AL | 800 | 1666.667 | 50 | 16.6666667 | NA | 1 |
| AL003 | Mobile Bay | 1864-08-02 | 1864-08-23 | MW | MW64-09 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL003 | TRUE | 0 | Farragut’s Fleet (14 wooden ships and 4 monitors) and U.S. army forces near Mobile [US]; Buchanan’s Flotilla (3 gunboats and an ironclad), Fort Morgan Garrison, Fort Gaines Garrison, and Fort Powell Garrison [CS] | 1,822 (US 322; CS 1,500) | Union victory | I.1 | A | http://www.nps.gov/abpp/battles/al003.htm | Passing of Forts Morgan and Gaines | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 36916.74 | 16130.48 | 34927.42 | TRUE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | NA | NA | NA | http://aad.archives.gov/aad/record-detail.jsp?rid=323&dt=295 | Naval Operations | TRUE | 30.2410 | -88.0530 | AL | NA | NA | 1827 | 833.4166667 | NA | 4 |
| AL004 | Decatur | 1864-10-26 | 1864-10-29 | MW | MW64-13 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL004 | TRUE | 0 | Garrison and other troops sent there (approx. 5,000 men) [US]; Army of Tennessee [CS] | 605 total (US 155; CS 450) | Union victory (Confederate forces could not cross the river.) | IV.2 | C | http://www.nps.gov/abpp/battles/al004.htm | NA | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 14623.51 | 1148.87 | 0.00 | TRUE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | C | NA | http://aad.archives.gov/aad/record-detail.jsp?rid=3&dt=295 | Demonstration | TRUE | 34.6140 | -86.9860 | AL | NA | NA | 355 | 833.4166667 | NA | 2 |
| AL005 | Spanish Fort | 1865-03-27 | 1865-04-08 | MW | MW65-02 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL005 | TRUE | 0 | XVI and XIII Corps [US]; Spanish Fort Garrison [CS] | 1,401 (US 657; CS 744) | Union victory | IV.2 | B | http://www.nps.gov/abpp/battles/al005.htm | NA | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 3742.16 | 2282.27 | 0.00 | TRUE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | NA | B | C | http://aad.archives.gov/aad/record-detail.jsp?rid=2&dt=295 | Siege & Capture | TRUE | 30.6843 | -87.9164 | AL | NA | NA | 1398 | 0.1666667 | NA | 3 |
| AL006 | Fort Blakely | 1865-04-02 | 1865-04-09 | MW | MW65-02 | Union | http://www.nps.gov/civilwar/search-battles-detail.htm?battleCode=AL006 | TRUE | 0 | XIII and XVI Corps [US]; Fort Blakely Garrison [CS] | Total 4,475. April 9 only 3,529 (US 629; CS 2,900) | Union victory (Fort Blakely surrendered.) | III.1 | A | http://www.nps.gov/abpp/battles/al006.htm | NA | TRUE | http://www.nps.gov/abpp/CWSII/CWSACReportAlabamaUpdate.pdf | 6061.92 | 3743.28 | 4640.71 | TRUE | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 1 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | 0 | B | A | B | http://aad.archives.gov/aad/record-detail.jsp?rid=1&dt=295 | Siege & Capture | TRUE | 30.7424 | -87.9270 | AL | 20000 | 166666.667 | 4475 | 833.4166667 | NA | 4 |
battle_sf <- battle_df |> sf::st_as_sf(
coords=c("long","lat"),
crs=4326
) |> sf::st_transform(3857)
mapview(battle_sf,zcol="result", label="result")battle_sf_significance <- battle_df |> sf::st_as_sf(
coords=c("long","lat"),
crs=4326
) |> sf::st_transform(3857)
mapview(battle_sf_significance,zcol="result", cex="significance_numeric", label="result")Hypothesis Testing
Let’s generate a sample that randomly assigns the outcome of the battle.
battle_sf_sample <- battle_df |> sf::st_as_sf( coords=c("long","lat"), crs=4326
) |> sf::st_transform(3857)
N <- 300
battle_sf_sample <- battle_sf_sample |> sample_n(N) |>
mutate(label = paste0("Result: ",result))
mapview(battle_sf_sample,zcol="result", label="result")Making a hull for the junctions.
hull_sf <- battle_sf |>
concaveman::concaveman(concavity=3) |>
sf::st_buffer(dist=5)
plot(hull_sf)
Making a ppp object for the junctions.
junction_sfc <- junction_sf |> sf::st_as_sfc()
junction_ppp <- as.ppp(junction_sfc, W=as.owin(hull_sf))Warning: 5 points were rejected as lying outside the specified window
plot(junction_ppp)Warning in plot.ppp(junction_ppp): 5 illegal points also plotted

battle_sfc <- battle_sf |> sf::st_as_sfc()
battle_ppp <- as.ppp(battle_sfc, W=as.owin(hull_sf))Warning: data contain duplicated points
plot(battle_ppp)
Intensity functions
junction_int <- density(junction_ppp)
plot(junction_int)
battle_int <- density(battle_ppp)
plot(battle_int)
Observed quadrant counts
num_regions <- 3
region_labels <- c("Low", "Medium", "High")
junction_vals <- junction_int
junction_quant <- quantile(junction_vals, probs=(0:num_regions)/ num_regions, na.rm=TRUE)
junction_cut <- cut(junction_vals, breaks=junction_quant, labels=region_labels)
junction_areas <- tess(image=junction_cut)
plot(junction_areas)
union_sf <- battle_sf |> filter(result == "Union")
union_sfc <- union_sf |> sf::st_as_sfc()
union_ppp <- as.ppp(union_sfc, Window(junction_ppp))
plot(union_ppp)
obs_union_counts <- quadratcount(union_ppp, tess=junction_areas) |> as.vector()
names(obs_union_counts) <- region_labels
obs_union_counts Low Medium High
9 41 144
Monte Carlo pipeline
set.seed(1007)
gen_sim_ppp <- function() {
union_sim <- spatstat.random::rpoint( n = nrow(union_sf),
f = junction_int
)
return(union_sim)
}
sim_union_ppp <- gen_sim_ppp()
plot(sim_union_ppp)
compute_quadrat_counts <- function(sim_ppp) {
sim_counts <- quadratcount(sim_ppp, tess=junction_areas) |> as.vector()
names(sim_counts) <- region_labels
return(sim_counts)
}
compute_quadrat_counts(sim_union_ppp) Low Medium High
0 24 170
set.seed(1007)
gen_sims_ppp <- function(num_sims) {
union_sims <- spatstat.random::rpoint( n = nrow(union_sf),
f = junction_int,
nsim = num_sims
)
return(union_sims) }
full_sims_list <- gen_sims_ppp(num_sims = 999)
full_sim_area_counts <- lapply(X=full_sims_list, FUN=compute_quadrat_counts)
full_count_df <- as_tibble(full_sim_area_counts) |> t() |> as_tibble()Warning: The `x` argument of `as_tibble.matrix()` must have unique column names if
`.name_repair` is omitted as of tibble 2.0.0.
ℹ Using compatibility `.name_repair`.
colnames(full_count_df) <- region_labels
full_count_df |> head()| Low | Medium | High |
|---|---|---|
| 0 | 24 | 170 |
| 2 | 28 | 164 |
| 0 | 25 | 169 |
| 1 | 20 | 173 |
| 1 | 30 | 163 |
| 2 | 27 | 165 |
Test statistic
mc_df <- bind_rows(full_count_df, obs_union_counts)
full_count_df |> ggplot(aes(x=High)) +
geom_density(fill=cb_palette[2], alpha=0.5) +
geom_vline(xintercept = obs_union_counts["High"], linetype="dashed", color=cb_palette[1])
p_value_df <- mc_df[mc_df$High <= obs_union_counts["High"],]
p_value <- nrow(p_value_df) / nrow(mc_df)
p_value[1] 0.001
What about Confederate victories?
confed_sf <- battle_sf |> filter(result == "Confederate")
confed_sfc <- confed_sf |> sf::st_as_sfc()
confed_ppp <- as.ppp(confed_sfc, Window(junction_ppp))Warning: data contain duplicated points
plot(confed_ppp)
obs_confed_counts <- quadratcount(confed_ppp, tess=junction_areas) |> as.vector()
names(obs_confed_counts) <- region_labels
obs_confed_counts Low Medium High
1 27 92
Monte Carlo pipeline
set.seed(1007)
gen_sim_ppp <- function() {
confed_sim <- spatstat.random::rpoint( n = nrow(confed_sf),
f = junction_int
)
return(confed_sim)
}
sim_confed_ppp <- gen_sim_ppp()
plot(sim_confed_ppp)
compute_quadrat_counts <- function(sim_ppp) {
sim_counts <- quadratcount(sim_ppp, tess=junction_areas) |> as.vector()
names(sim_counts) <- region_labels
return(sim_counts)
}
compute_quadrat_counts(sim_confed_ppp) Low Medium High
0 18 102
set.seed(1007)
gen_sims_ppp <- function(num_sims) {
confed_sims <- spatstat.random::rpoint( n = nrow(confed_sf),
f = junction_int,
nsim = num_sims
)
return(confed_sims) }
full_sims_list <- gen_sims_ppp(num_sims = 999)
full_sim_area_counts <- lapply(X=full_sims_list, FUN=compute_quadrat_counts)
full_count_df <- as_tibble(full_sim_area_counts) |> t() |> as_tibble()colnames(full_count_df) <- region_labels
full_count_df |> head()| Low | Medium | High |
|---|---|---|
| 0 | 18 | 102 |
| 0 | 10 | 110 |
| 1 | 16 | 103 |
| 0 | 14 | 106 |
| 0 | 14 | 106 |
| 1 | 12 | 107 |
Test statistic
mc_df <- bind_rows(full_count_df, obs_confed_counts)
full_count_df |> ggplot(aes(x=High)) +
geom_density(fill=cb_palette[2], alpha=0.5) +
geom_vline(xintercept = obs_confed_counts["High"], linetype="dashed", color=cb_palette[1])
p_value_df <- mc_df[mc_df$High <= obs_confed_counts["High"],]
p_value <- nrow(p_value_df) / nrow(mc_df)
p_value[1] 0.003